
import urllib.request as ur
import lxml.etree as le



# #請求URL
# request = ur.Request(url='https://edu.csdn.net/')
# #讀取頁面
# response = ur.urlopen(request).read()
# #保存頁面
# with open('edu1.html','wb') as f:
#     f.write(response)

#======

# ㄧ集分類
url1 = '//div[@class="classify_c"]/div/h3/a/text()'

# 二集分類
url2 = '//div[@class="classify_c"]/div/div/span/a/text()'

#讀取頁面，逕行解析
with open('edu.html','r',encoding='utf-8') as f:
    html = f.read()

    #將html文件轉成XML(Xpath格式)，可以對這對象進行索引
    html_x = le.HTML(html)
    # print(html_x)
    #進行索引，會得到xpath 對象
    div_x_s = html_x.xpath('//div[@class="classify_c"]')
    # print(div_x_s)

    data_s =[]

    for div_x in div_x_s:
        #一級標籤
        category1 = div_x.xpath('./div/h3/a/text()')[0]
        # print(category1)
        #二級標籤
        category2 = div_x.xpath('./div/div/span/a/text()')
        # print('  ',category2)

        #將ㄧ級與二級標籤天加入列表
        data_s.append(
            dict(
                category1=category1,
                category2=category2,
            )
        )

    for data in data_s:
        print(data.get('category1'))
        for category2 in data.get('category2'):
            print('  *',category2)














